# !diagnostics off
library(readr)
library(magrittr)
library(sandwich)
library(xtable)
library(ggplot2)
library(lmtest)
library(haven)
library(Hmisc)
library(stargazer)
library(plm)
library(dplyr)
library(rdrobust)
library(descr)
library(ggthemes)
library(interplot)
library(mediation)
library(readxl)
library(broom)
library(tidyverse) 
library(logistf) 

## set working directory to the replicationdata folder
# setwd("~/Dropbox/Alliance/replicationdata")

########################
###### Load data #######
########################
 
reald  <- read_csv("base_allydat.csv")
#### Check data ####
nrow(reald) #274 okay
reald$X1= NULL
reald$violation = reald$violation %>% as.numeric()


#### Make variables ####
## post45 variable
reald= reald %>% mutate(
  post45= ifelse(begyr>1944, 1, 0),
  post45two= ifelse(begyr>1945,1,0),
  post45end= ifelse(end>1945, 1, 0)) 

 
## duration of alliance
reald= reald %>% mutate(
  end= ifelse(endyr ==0, 2011, endyr),
  duration= end- begyr+ 1 #problem: censoring
)
## code challenges only from inside
reald= reald %>% mutate(
  chalwithin= ifelse(challenged_inc_within==1 & challenged==0, 1, 0),
)
table(reald$chalwithin, useNA='ifany') #16 #okay

## bilateral alliances
reald= reald %>% mutate(
  bilat= ifelse(is.na(mem3)== T & is.na(mem2) ==F, 1, 0) 
)
reald$bilat %>% table #66 208
reald$multilat= ifelse(reald$bilat==0, 1, 0)
reald$multilat %>% table #ok

## how many alliances per member?
members = paste0('mem', 1:50)
membermat =reald [ ,members] %>% as.matrix()
# list of members coded within ATOP
unmembers= data.frame(allcols=union(membermat[,1:2], membermat[,3:43])) %>% unique
unmembers=  unmembers$allcols %>% as.numeric()
unique(unmembers) %>% length #181
 
## how many member per alliance?
# find how many members
store= c()
for (i in 1:nrow(membermat)){
  lv= membermat[i,] %>% as.vector()
  store[i] = max(which(!is.na(lv)))
  
}
# merge to reald
reald$nally= store
tapply(reald$nally, reald$post45, mean) #2.945946 3.674847 




###########################################
####### Expand Data Frame (reald_exp) #####
###########################################


##### Transform reald d into alliance-year dat (reald_exp)#####
### Expanding data
# indicator
reald$indic= 2
## make appropriate duplicates
# get actual end year
reald= reald %>% mutate(
  end= ifelse(endyr ==0, 2010, endyr)
)
# time 
reald= reald %>% mutate(
  duration= end- begyr+ 1,
  casenum= 1:nrow(reald)
)
# expand
reprow =rep(rownames(reald), reald$duration) %>% as.numeric()
reald_exp= reald[reprow, ]

colmatch= rep(row.names(reald), reald$duration) %>% as.data.frame()
names(colmatch)= "casenum"
reald_exp= merge(colmatch, reald, by= "casenum" )

#  get the years
#duplicate mark
reald_exp= reald_exp %>% group_by(casenum) %>% 
  mutate(dup= 1:n())
#transform to year
reald_exp= reald_exp %>% mutate(
  year= begyr+ dup -1
) 
reald_exp= reald_exp %>% arrange(year, atopid)
reald_exp$casenum = NULL
dim(reald_exp)
 

######################################################
##### Make into state-year data (reald_sy, ep_sy) ####
#######################################################


# first for reald_exp (alliance start year to end)
membermat =reald_exp [ ,c('atopid','year',members)] %>% as.matrix()
dim(membermat)  

## making state-year data 
reald_sy= c()
for (i in 1:nrow(membermat)) {
  yloop= combn(membermat[i,3:50], 1)
  yloop= t(yloop)
  yloop= na.omit(yloop)
  yloop= cbind(membermat[i, 'year'], yloop)
  yloop= cbind(membermat[i, 'atopid'], yloop)
  yloop= apply(yloop, 2, as.numeric)
  reald_sy= rbind(reald_sy, yloop)
}

reald_sy= reald_sy %>% as.data.frame()
colnames(reald_sy)= c('atopid', 'year', 'ccode1')
rownames(reald_sy) = NULL
dim(reald_sy) #18913    3 18593     3     3
summary(reald_sy)



##### Make into dyad-year data (reald_dyad) ####
# first for reald_exp
membermat =reald_exp [ ,c('atopid','year',members)] %>% as.matrix()
## making dyad data 
reald_dyad= c()
for (i in 1:nrow(membermat)) {
  yloop= combn(membermat[i,3:45], 2)
  yloop= t(yloop)
  yloop= na.omit(yloop)
  yloop= cbind(membermat[i, 'year'], yloop)
  yloop= cbind(membermat[i, 'atopid'], yloop)
  yloop= apply(yloop, 2, as.numeric)
  reald_dyad= rbind(reald_dyad, yloop)
}

reald_dyad= reald_dyad %>% as.data.frame()
colnames(reald_dyad)= c('atopid', 'year', 'ccode1', 'ccode2')
rownames(reald_dyad) = NULL 


##### Get CINC scores from MIDS directed dyad data ####

dyaddat= read.delim("directed.dat")
dim(dyaddat) #1573036     156
playd= dyaddat[ ,c('year', 'ccode1', 'ccode2', 'abbrev1', 'abbrev2',
                   'cap_1', 'cap_2', "milper_1", "milex_1",  "energy_1",
                   "irst_1",   "upop_1",   "tpop_1",   "milper_2", "milex_2",
                   "energy_2", "irst_2",   "upop_2",   "tpop_2",   "majpow1",
                   "majpow2",  "region1",  "region2" , "pol_rel",
                   's_un_glo', 's_wt_glo', 'cwnmmdal', 'flow1', 'flow2',
                   'polity21', 'polity22', 'contig')]
dim(playd) #1573036      32
subplayd= dyaddat[ ,c('year', 'ccode1', 'abbrev1','cap_1', "milper_1", "milex_1",  "energy_1",
                      "irst_1",   "upop_1",   "tpop_1",   "majpow1",
                      "region1",  'polity21')]
dim(subplayd) #1573036      13 okay

## merge above information with reald_sy data  

reald_syinfo= merge(reald_sy, subplayd, by= c('year', 'ccode1'),
                    all.x = T)
# get rid of duplicates
reald_syinfo= unique(reald_syinfo)
 

# cleaning missing values
reald_syinfo$milper_1[reald_syinfo$milper_1== -9] <-NA
reald_syinfo$milex_1[reald_syinfo$milex_1== -9] <-NA
reald_syinfo$energy_1[reald_syinfo$energy_1== -9] <-NA
reald_syinfo$irst_1[reald_syinfo$irst_1== -9] <-NA
reald_syinfo$upop_1[reald_syinfo$upop_1== -9] <-NA
reald_syinfo$tpop_1[reald_syinfo$tpop_1== -9] <-NA
reald_syinfo$polity21[reald_syinfo$polity21==-99] <- NA
summary(reald_syinfo)  

 

##############################
###### Make Episode Data #####
##############################
episode<- read.csv('episode2022.csv')

#### Manage Episode Data  ####
## make total challenged 
nrow(episode) #283
episode$X1= NULL
episode$violation = episode$violation %>% as.numeric()
table(episode$violation) 
 
episode$violreal = episode$violreal %>% as.numeric()
table(episode$violreal) 
 
episode$challreal = episode$challreal %>% as.numeric()
 
episode= episode %>% mutate(
  post45= ifelse(begyr>1944, 1, 0)) 

 
## duration of alliance
episode= episode %>% mutate(
  end= ifelse(endyr ==0, 2011, endyr),
  duration= end- begyr+ 1  
)

## episode bilat
episode= episode %>% mutate(
  bilat= ifelse(is.na(mem3)== T & is.na(mem2) ==F, 1, 0) 
)

###### Cases List ######

chall =episode %>% filter (challreal==1) #30 okay 
table = chall[,c('atopid', 'warname2', 'memo')] %>% as.matrix
made= xtable(table, digits=0)
print(made, include.rownames = F)
 

#####################################################
##### Coding in alliance challenges/ violations #####
#####################################################
  
reald_exp$violreal= episode$violreal[match(interaction(reald_exp$year, reald_exp$atopid),
                                           interaction(episode$waryear, episode$atopid))]
reald_exp$violreal %>% table(useNA = 'ifany')
episode$violreal %>% table(useNA = 'ifany')

 
reald_exp$challreal= episode$challreal[match(interaction(reald_exp$year, reald_exp$atopid),
                                             interaction(episode$waryear, episode$atopid))]
reald_exp$challreal %>% table(useNA = 'ifany')
episode$challreal %>% table(useNA = 'ifany')


reald_exp= reald_exp %>% mutate( 
  challreal= replace_na(challreal, 0)
)

#### Dummy for Honor and Viol 

reald_exp= reald_exp %>% mutate( 
  honored_real= ifelse(violreal==0, 1, 0)
)  

reald_exp= reald_exp %>% mutate( 
  hondummy_real= ifelse(violreal==0, 1, 0), 
  hondummy_real= replace_na(hondummy_real,0)
)
reald_exp= reald_exp %>% group_by(atopid) %>% mutate(
  everhonored= max(hondummy_real)
)
reald$hondummy_real = reald_exp$everhonored[match(reald$atopid, reald_exp$atopid)]
 

####### CINC Ratio of Def/Attacker (episode data) 
## pull in information from NMC playd 
 
episode$defender= episode$defender %>% as.numeric()
episode$attacker= episode$attacker %>% as.numeric()
episode$waryear= episode$waryear %>% as.numeric()

 
#code in CINC scores of defender and attacker
episode$defcinc <- episode$attcinc <-NA
episode$defcinc= playd$cap_1[match(interaction(episode$waryear, episode$defender),
                                   interaction(playd$year, playd$ccode1))]
episode$attcinc= playd$cap_1[match(interaction(episode$waryear, episode$attacker),
                                   interaction(playd$year, playd$ccode1))]
 
#CINC ratio
 # attacker/ (defender+attacker):  
episode$capratio2 = episode$defcinc/ (episode$defcinc+ episode$attcinc)
episode$capratio2= round(episode$capratio2, digits = 3)
summary(episode$capratio2)
 

#put it into real episode dat reald_exp
reald_exp$capratio2= episode$capratio2[match(interaction(reald_exp$year, reald_exp$atopid),
                                             interaction(episode$waryear, episode$atopid))]
 
# reald_exp$capratio2_real= ifelse(reald_exp$challenged==1, reald_exp$capratio2, NA)
# reald_exp$capratio2_real %>% table %>% sum

reald_exp$defcinc= episode$defcinc[match(interaction(reald_exp$year, reald_exp$atopid),
                                         interaction(episode$waryear, episode$atopid))]
 
################################################### 
###### Insert capability data #######
 

## calculate each state's share of power within alliance
reald_syinfo= reald_syinfo %>% group_by(atopid, year) %>% 
  mutate(sumcinc_alliance= sum(cap_1)) 
 
# now calculate share 
reald_syinfo$allshare= 
  reald_syinfo$cap_1/reald_syinfo$sumcinc_alliance *100
 
# now average cinc score of ally per alliance-year
reald_syinfo$nally = reald$nally[match(reald_syinfo$atopid, reald$atopid)]
reald_syinfo= reald_syinfo %>% group_by(atopid, year) %>% 
  mutate(av_allycinc= sumcinc_alliance/nally)
reald_syinfo$av_allycinc %>% summary

# export to reald_exp
reald_exp$sumcinc_alliance= reald_syinfo$sumcinc_alliance[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]
reald_exp$av_allycinc= reald_syinfo$av_allycinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]


## getting max ally cinc score
reald_syinfo= reald_syinfo %>% group_by(atopid, year) %>% 
  mutate(maxallycinc= max(cap_1, na.rm=T),
         minallycinc= min(cap_1, na.rm=T)) 
reald_syinfo$maxallycinc[reald_syinfo$maxallycinc== -Inf] <- NA
reald_syinfo$minallycinc[reald_syinfo$minallycinc== Inf] <- NA
 
 
## get the mean of alliance share (for now) for each state per alliance duration
reald_syinfo= reald_syinfo %>% group_by(atopid) %>%
  mutate(
    maxally_mean= mean(maxallycinc, na.rm =T),
    minally_mean= mean(minallycinc, na.rm=T)) 

## Most powerful ally 
reald_syinfo$maxallycode=ifelse (reald_syinfo$cap_1== reald_syinfo$maxallycinc, 
                                 reald_syinfo$ccode1, NA)
 reald_syinfo= reald_syinfo %>% group_by(atopid, year) %>% 
  fill(maxallycode, .direction= 'up') %>% 
  fill(maxallycode)
table(reald_syinfo$maxallycode) #looks right
reald_exp$maxallycode= reald_syinfo$maxallycode[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]


nmc <- read_csv("NMC_5_0.csv")
nmc= nmc %>% group_by(ccode) %>% mutate(
  lcinc= Lag(cinc, shift=1)
)

reald_exp$lmaxallycinc =nmc$lcinc[match(
  interaction(reald_exp$maxallycode, reald_exp$year), interaction(nmc$ccode, nmc$year)
)]  

# now merge with data (reald)
 reald$maxally_mean = reald_syinfo$maxally_mean[match(reald$atopid, reald_syinfo$atopid)]
reald_exp$maxally_mean = reald_syinfo$maxally_mean[match(reald_exp$atopid, reald_syinfo$atopid)]
reald_exp$maxallycinc= reald_syinfo$maxallycinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]
 
reald$minally_mean = reald_syinfo$minally_mean[match(reald$atopid, reald_syinfo$atopid)]
reald_exp$minally_mean = reald_syinfo$minally_mean[match(reald_exp$atopid, reald_syinfo$atopid)]
reald_exp$minallycinc= reald_syinfo$minallycinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]
# reald$avally_mean = reald_exp$avally_mean[match(reald$atopid, reald_exp$atopid)]
reald_exp$minallycinc= reald_syinfo$minallycinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]

  

#### defender attacker ##### 

reald_exp$defcinc= episode$defcinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(episode$atopid, episode$waryear)
)]
 
reald_exp$attcinc= episode$attcinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(episode$atopid, episode$waryear)
)]
 
#####  Strongest Power not challenged ####

reald_syinfo= reald_syinfo %>% group_by(atopid, year) %>% 
  mutate(secallycinc= max( cap_1[cap_1!=maxallycinc], na.rm = T ))
reald_syinfo$secallycinc[reald_syinfo$secallycinc== -Inf] <- NA
reald_syinfo= reald_syinfo  %>% group_by(atopid) %>% mutate(
  lsecallycinc= Lag(secallycinc, shift = 1))
 
reald_exp$secallycinc= reald_syinfo$secallycinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]
reald_exp$lsecallycinc= reald_syinfo$lsecallycinc[match(
  interaction(reald_exp$atopid, reald_exp$year), interaction(reald_syinfo$atopid, reald_syinfo$year)
)]

reald_exp$nondef_maxcinc= ifelse(reald_exp$maxallycinc==reald_exp$defcinc, 
                                 reald_exp$secallycinc, reald_exp$maxallycinc)
reald_exp$nondef_maxcinc %>% summary #okay
 
######## power change in alliances ######

reald_exp= reald_exp %>%group_by(atopid) %>%  mutate(
  initpower= first(maxallycinc),
  pchange_ten= ifelse(max(maxallycinc, na.rm=T)> initpower*1.1 | 
                        min(maxallycinc, na.rm=T)< initpower*0.9, 1,0),
  pchange_thirty= ifelse(max(maxallycinc, na.rm=T)> initpower*1.3 | 
                           min(maxallycinc, na.rm=T)< initpower*0.7, 1,0)
  
) 

reald$pchange_ten = reald_exp$pchange_ten[match(reald$atopid, reald_exp$atopid)]
reald$pchange_thirty = reald_exp$pchange_thirty[match(reald$atopid, reald_exp$atopid)]
reald$initpower = reald_exp$initpower[match(reald$atopid, reald_exp$atopid)]
 
###################################################
######Interest Heterogeneity Index Making ########

# reald_dyadinfo
reald_dyadinfo= merge(reald_dyad, playd, by= c('year', 'ccode1', 'ccode2'), all.x = T)
dim(reald_dyadinfo) #122038     32 okay 

# cleaning missing values
reald_dyadinfo$s_un_glo[reald_dyadinfo$s_un_glo==-9]<- NA
reald_dyadinfo$s_wt_glo[reald_dyadinfo$s_wt_glo==-9]<- NA
reald_dyadinfo$polity21[reald_dyadinfo$polity21==-99] <- NA
reald_dyadinfo$polity22[reald_dyadinfo$polity22==-99] <- NA
reald_dyadinfo$cwnmmdal[reald_dyadinfo$cwnmmdal==-9]<- NA
reald_dyadinfo$flow1[reald_dyadinfo$flow1==-9]<- NA
reald_dyadinfo$flow2[reald_dyadinfo$flow2==-9]<- NA

reald_dyadinfo= reald_dyadinfo %>% mutate(
  politdif= abs(polity21-polity22)
)
summary(reald_dyadinfo$polity21) #cleaned, except military and population and sorts 
 
 
#get info from dyad dataset to merge into reald 
allydat= reald_dyadinfo %>% group_by(atopid) %>% summarize(sunmean = mean(s_un_glo, na.rm= T),
                                                           swmean= mean(s_wt_glo, na.rm= T), 
                                                           sunvar= var(s_un_glo, na.rm= T),
                                                           swvar= var(s_wt_glo, na.rm= T),
                                                           nmidmean= mean(cwnmmdal, na.rm= T),
                                                           pdifmean= mean(politdif, na.rm=T),
                                                           pdifvar= var(politdif, na.rm=T))
dim(allydat) #274   9 good
allydat %>% summary()
t=reald_dyadinfo %>% group_by(atopid, year) %>% summarize(
  nmidmeanyear= mean(cwnmmdal, na.rm=T) 
)

aa= reald_syinfo %>% group_by(atopid) %>% summarize(politymean= mean(polity21, na.rm=T))
reald$politymean= aa$politymean
reald_exp$politymean= reald$politymean[ match(reald_exp$atopid, reald$atopid)]
reald$pdifmean= allydat$pdifmean[ match(reald$atopid, allydat$atopid)]
reald$nmidmean= allydat$nmidmean[ match(reald$atopid, allydat$atopid)]
 
 
###### Number of Alliance #####
atopsy <- read_csv("atop4_0sy.csv")
dim(atopsy) # 11485
atopsy= atopsy %>% filter(defense==1)
dim(atopsy) #8589

allymat= atopsy[ , 11:63] %>% as.matrix()

store= c()
for (i in 1:nrow(allymat)){
  lv= allymat[i,] %>% as.vector()
  store[i] = max(which(!is.na(lv)))
  
}
# merge to reald
atopsy$nalliance = store
 
## calculate mean amount of alliances per year, including 0 
nmc$nalliance= atopsy$nalliance[match(interaction(nmc$ccode, nmc$year),
                                        interaction(atopsy$state, atopsy$year))]
nmc$nalliance %>% table (useNA= 'ifany')
nmc$nalliance2= ifelse(is.na(nmc$nalliance)==T, 0, nmc$nalliance)
nmc$nalliance2 %>% table (useNA= 'ifany')#okay
nmc= nmc %>% arrange(ccode, year)
nmc$year %>% class
tapply(nmc$nalliance2, nmc$year, mean)
nmc= nmc %>% mutate(
  post45= ifelse(year>1944, 1, 0)
)
# matrix with mean amount of alliances per state 
mat= nmc %>% group_by(year) %>% summarise(mean= mean(nalliance, na.rm=T))



##### Level of institutionalization #####
reald= reald %>% mutate(
  milinst= ifelse(milcon==3| intcom==1| base>0, 1,0),
  milinst= ifelse(milcon<3 & is.na(intcom)==T & is.na(base)==T, 0, milinst)
)
reald$milinst %>% table(useNA= 'ifany')   
 
reald_exp$milinst= reald$milinst[ match(reald_exp$atopid, reald$atopid)]
reald_exp$milinst %>% table(useNA= 'ifany')
 
ss= reald_dyadinfo %>% group_by(atopid, year) %>% summarize(alypdmean= mean(politdif, na.rm=T))
reald_exp$pdifyr= ss$alypdmean[match(interaction(reald_exp$atopid, reald_exp$year),
                                     interaction(ss$atopid, ss$year))]
 
ss= reald_syinfo %>% group_by(atopid, year) %>% summarize(politymeanyear= mean(polity21, na.rm=T))
reald_exp$politymeanyear= ss$politymeanyear[match(interaction(reald_exp$atopid, reald_exp$year),
                                                  interaction(ss$atopid, ss$year))]




reald_exp$nmidmeanyear= t$nmidmeanyear[match(interaction(reald_exp$atopid, reald_exp$year),
                                             interaction(t$atopid,t$year))]
reald_exp$nmidmeanyear
reald_exp$nmidmean= reald$nmidmean[match(reald_exp$atopid, reald$atopid)]

reald$honored= ifelse(reald$violation==0, 1,0)
reald_exp$honored= ifelse(reald_exp$violation==0,1,0)
reald_exp$honored_all= ifelse(reald_exp$violation_all==0,1,0)
reald_exp$honored %>% table (useNA= 'ifany')#14
reald_exp$honored_all %>% table (useNA= 'ifany')#16
episode$honored= ifelse(episode$violation==0,1,0)
episode$honored %>% table #16
  
# write.csv(reald, 'reald.csv')
# write.csv(reald_exp, 'reald_exp2022.csv')

### subset data to appropriate time frame
reald_exp = reald_exp %>% filter(year >=1816 & year<2011)

#########################
##### War variables #####
#########################

# bring in war data
war <- read_csv("Inter-StateWarData_v4.0.csv")
war %>% dim #337  25
war= war %>% mutate(
  StartYear1= StartYear1 %>% as.numeric(),
  StartMonth1= StartMonth1 %>% as.numeric(),
  StartDay1= StartDay1 %>% as.numeric(),
  initwar= StartYear1*1000+StartMonth1*100+StartDay1
)
war= war%>% arrange(WarNum) %>%group_by(WarNum) %>% mutate(
  joinrank= rank(initwar, ties.method= "min"),
  originator= ifelse(joinrank==1, 1, 0),
  joiner= ifelse(joinrank!=1, 1, 0)
) 
### Bringing in War data without originators 
### who were not main attackers or defenders
### formatted from war data above

warorig2 <- read_csv("warorig2.csv")
warorig2$Attacker %>% table
warorig2= warorig2 %>% group_by(WarNum) %>% mutate(
  attcinc= ifelse(Attacker==1, lagcinc, NA),
  defcinc= ifelse(Attacker==2, lagcinc, NA)
)

#now match these back into war data
attvec= cbind.data.frame(warorig2$WarNum, warorig2$attcinc)
attvec= na.omit(attvec)
colnames(attvec)= c('WarNum', 'attcinc')
war$attcinc= attvec$attcinc[match(war$WarNum, attvec$WarNum)]

defvec= cbind.data.frame(warorig2$WarNum, warorig2$defcinc)
defvec= na.omit(defvec)
colnames(defvec)= c('WarNum', 'defcinc')
war$defcinc= defvec$defcinc[match(war$WarNum, defvec$WarNum)]

war= war %>% group_by(WarNum) %>% mutate(
  defatt= defcinc/(defcinc+ attcinc)
)

# doing the same for ccode of defender and attacker

warorig2$Attacker %>% table
warorig2= warorig2 %>% group_by(WarNum) %>% mutate(
  attcinc= ifelse(Attacker==1, lagcinc, NA),
  defcinc= ifelse(Attacker==2, lagcinc, NA)
)
warorig2= warorig2 %>% group_by(WarNum) %>% mutate(
  defatt= defcinc/(defcinc+ attcinc)
)
 
war= war %>% mutate(
  post45= ifelse(StartYear1>1944, 1, 0)
)
## slicing data to contain only unique war cases
waruniq= war %>% group_by(WarNum) %>% slice(1) 
dim(waruniq)
## bringing back formatted data of waruniq
## coded to show bilateral and multilateral wars
warcase <- read_csv("warcase.csv")

############################################
######## Data Analysis start ###############
######### Main Paper results ###############
############################################
 
### Tables replication ####
##### Table 1 ######

a= lm(challreal~ maxallycinc, data=reald_exp)
b=lm(challreal~ maxallycinc+nally+
       pdifyr+politymeanyear+milinst+pchange_thirty,data=reald_exp)
g=lm(honored_real~ nondef_maxcinc ,data=reald_exp) 
h=lm(honored_real~ nondef_maxcinc+nally+pdifyr+ 
       politymeanyear+milinst+pchange_thirty,data=reald_exp) 
a1= coeftest(a, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
b1= coeftest(b, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))

g1= coeftest(g, 
             vcov. = function(x) vcovHC(x ,cluster= "atopid",  type = "HC1"))

h1= coeftest(h, 
             vcov. = function(x) vcovHC(x , cluster= "atopid", type = "HC1"))

stargazer(a1,b1,  g1,h1,  
          dep.var.labels.include = T,
          dep.var.labels = c('Challenges', 'Honor|Challenge'),
          covariate.labels = c(maxallycinc='Ally Capability',
                               
                               nally= 'Number of Allies', 
                               pdifyr= 'Polity Difference',
                               politymeanyear='Average Alliance Polity ',
                               milinst= 'Military Institutionalization',
                               pchange_thirty= 'Capability Change'
                               # capratio2= 'Def/Att Capratio'
          ),
          no.space = T)
## note for replication: 
## the (nondef_maxcinc) variable for Models 3 and 4
## are captioned as "Ally Capability" in the main text 
sd(reald_exp$maxallycinc, na.rm = T) #0.077
 
 
##### Table 2  ########


a= lm(hondummy_real~maxallycinc+ I(maxallycinc^2), reald_exp)
b=lm(hondummy_real~maxallycinc+ I(maxallycinc^2)+nally+pdifyr+politymeanyear +milinst+pchange_thirty, reald_exp)
c= glm(hondummy_real~maxallycinc+ I(maxallycinc^2), reald_exp,  family = binomial(link = 'logit'))
d=glm(hondummy_real~maxallycinc+ I(maxallycinc^2)+nally+pdifyr+politymeanyear +milinst+pchange_thirty, reald_exp,
      family = binomial(link = 'logit'))
a1= coeftest(a, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
b1= coeftest(b, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
c1= coeftest(c, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
d1= coeftest(d, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))



stargazer(a1,b1,c1,d1,
          dep.var.labels = 'Honor',
          covariate.labels = c(
            maxallycinc= 'Ally Capability',
            maxallycinc2= 'Delta P2',
            nally= 'Number of Allies',
            pdifyr= 'Polity Difference',
            politymeanyear='Average Alliance Polity',
            milinst='Military Institutionalization',
            pchange_thirty= 'Capability Change'), no.space = T) 

## results for wald test  values 
e=  lm(hondummy_real~maxallycinc+nally+ pdifyr+politymeanyear+
         milinst+pchange_thirty, reald_exp)
f=  lm(hondummy_real~maxallycinc, reald_exp)
g= glm(hondummy_real~ maxallycinc , reald_exp,  family = binomial(link = 'logit'))
h=glm(hondummy_real~maxallycinc+nally+ pdifyr+politymeanyear+
        milinst+pchange_thirty, reald_exp, family = binomial(link = 'logit'))

waldtest(a,f)
waldtest(b,e)
lrtest(c,g)
lrtest(d,h)



######## Table 3 ############
q=lm(multilat~ post45, data= reald)
w=lm(nally~ post45, data= reald) 
r=lm(pdifmean~ post45, data= reald)
s= lm(pchange_thirty ~post45, data = reald) 
y=lm(milinst~ post45, data= reald)
u=lm(multi~ post45, data= warcase)
i=lm(defatt ~ post45, data = waruniq)
o= lm(politymean~ post45, data=reald)

stargazer(q,w,r,y, 
          dep.var.labels = c('Multilateral', 'Members per alliance',
                             'Polity Difference b/w Allies','Military Institutionalization'),
          no.space = T)
stargazer(o, u,i,s,
          dep.var.labels = c('Average Polity',  
                             'Multi War','Def/Att capratio', 'Capability Change'),
          no.space = T)

post45= reald %>% filter(reald$post45==1)
pre45= reald %>% filter(reald$post45==0)
 

############################## 
######## Figures #############

##### Figure 1 #####
# density graph
a= reald$maxally_mean %>% quantile (na.rm=T) %>% nth(4)
b= reald$maxally_mean %>% quantile (na.rm=T) %>% nth(2)
pre45<- reald %>% filter(begyr<1945)
post45<- reald %>% filter(begyr>=1945)
ggplot(data = reald , aes(x= maxally_mean,  
                          group=as.factor(post45), fill= as.factor(post45)))+
  geom_density(alpha=0.55, aes (linetype= as.factor(post45)))+
  scale_linetype_manual(name="Period",values=c(2,1),
                        labels=c('Pre45', 'Post45'))+
  scale_fill_manual(values= c('white', 'grey'),
                    labels=c('Pre45', 'Post45'),
                    name= 'Period')+
  xlab('Strongest ally capability per alliance')+
  ylim(0,20)+
  geom_vline(xintercept= mean(reald$maxally_mean, na.rm = T), linetype=3)+
  # geom_vline(xintercept= mean(pre45$maxally_mean, na.rm = T), linetype=3)+
  # geom_vline(xintercept= mean(post45$maxally_mean, na.rm = T), linetype=3)+
  geom_vline(xintercept= c(a,b), linetype=3)+  
  annotate("text", x= 0.08, y= 19.5, label= 'Average')+
  annotate("text", x= 0.15, y= 19.5, label= '3rd quartile')+
  annotate("text", x= 0.02, y= 19.5, label= '1st quartile')+
  theme(legend.position='bottom')
# 
# ggsave('polar1.png')


##### Figure 4 ####


reald_exp<- reald_exp %>% mutate(
  honored_real_graph = case_when(honored_real ==1 ~ 2 ,
                                 honored_real ==0 ~ 1 ,
                                 T ~0)
)

pre45chal= reald_exp %>% filter(post45==0 & challreal==1)
post45chal= reald_exp %>% filter(post45==1 & challreal==1)

ggplot(data = reald_exp, aes(x=year, y= maxallycinc))+
  geom_point(aes( color= as.factor(honored_real_graph), shape= as.factor(honored_real_graph)),
             position=position_jitter(h=0.035, set.seed(2022)), size=2)+
  geom_vline(xintercept = 1945,  lwd=1, lty=2)+ 
  geom_smooth(aes(x=year, y= maxallycinc),na.rm=T, data = pre45chal, 
              color= 'black', linetype=1, lwd=0.8, se=F, method='loess', span=.9)+
  geom_smooth(aes(x=year, y= maxallycinc), data = post45chal,
              color= 'black', linetype=1, lwd=0.8, se=F, method='loess', span=.9)+
  ylab('Ally capability (CINC)')+
  xlab('Year')+
  annotate("text", x= 1952, y= 0.35, label= '1945')+
  scale_color_manual(name= 'Observed Outcome',
                     labels= c('2'= 'Honored', '1'= 'Violated','0'= 'Unchallenged'),
                     values= c(alpha('black',0.1), 'firebrick1', 'blue2'))+ 
  scale_shape_manual(name= 'Observed Outcome',
                     labels= c('2'= 'Honored', '1'= 'Violated','0'= 'Unchallenged'),
                     values= c(20,15, 8 ))+ 
  theme(legend.position = 'bottom',
        # = c(0.31, 0.93), 
        legend.background = element_rect(color = "black", 
                                         fill = "grey90"),
        legend.direction = "horizontal")
# ggsave(       filename= 'polar3-1.png', width=7.7, height=4.7)

##### Figure 5 ######

##regressions  for Table 2 used to generate predicted values
b=lm(hondummy_real~maxallycinc+ I(maxallycinc^2)+nally+pdifyr+politymeanyear +milinst+pchange_thirty, reald_exp)
d=glm(hondummy_real~maxallycinc+ I(maxallycinc^2)+nally+pdifyr+politymeanyear +milinst+pchange_thirty, reald_exp,
      family = binomial(link = 'logit'))

##code for plot

newdat= seq(0,0.3,0.001) %>% as.data.frame()

newdat= newdat %>% mutate(
  nally= mean(reald_exp$nally),
  pdifyr= mean(reald_exp$pdifyr, na.rm=T),
  politymeanyear= mean(reald_exp$politymeanyear, na.rm=T),
  milinst= mean(reald_exp$milinst, na.rm=T),
  pchange_thirty= mean(reald_exp$pchange_thirty, na.rm=T)
)
colnames(newdat)= c('maxallycinc', 'nally','pdifyr', 'politymeanyear', 'milinst', 'pchange_thirty')
predval_ols= predict(b, type = 'response', newdata = newdat, 
                     interval = 'confidence')  %>% as.data.frame()
predval_ols$maxallycinc=seq(0,0.3,0.001)
newdat$olsfit = predval_ols$fit
newdat$olsupr = predval_ols$upr
newdat$olslwr = predval_ols$lwr



predval_glm= predict(d, type= 'link', newdata = newdat, se.fit = T)
cv= qt(0.975,df= d$df.null)
upr <- predval_glm$fit + (cv * predval_glm$se.fit)
lwr <- predval_glm$fit - (cv* predval_glm$se.fit)
fit <- predval_glm$fit
fit2 <- d$family$linkinv(fit)
upr2 <- d$family$linkinv(upr)
lwr2 <- d$family$linkinv(lwr)

newdat$lwr <- lwr2 
newdat$upr <- upr2 
newdat$predval_glm <-predval_glm$fit

ggplot(data=newdat  ) + 
  geom_line(aes(x=maxallycinc,y=fit2, color='green',linetype= 2), lwd=1.1, linetype= 2) +         
  geom_ribbon(aes(x=maxallycinc, ymin= lwr, ymax=upr), 
              fill= 'darkgrey', alpha=.3)+
  geom_line( mapping=aes(x=maxallycinc, y=olsfit, color='red',linetype= 1), lwd=1.1, linetype=1)+ 
  geom_ribbon(aes(x=maxallycinc, ymin= olslwr, ymax=olsupr), 
              fill= 'darkgrey', alpha=.4)+
  geom_rug(data= reald_exp,aes(maxallycinc))+
  xlim(0,0.26)+ ylim(-0.005,0.026)+
  geom_hline(yintercept = 0, lty=3, lwd=.5)+
  xlab('Ally Capability')+
  ylab('Predicted Pr(Observed Compliance)')+
  scale_color_manual(name= 'Model', label= c('OLS', 'Logit'),
                     values= c('black','black'))+
  scale_linetype_manual(name= 'Model', label= c('OLS', 'Logit'),
                        values= c(1,2))+
  geom_vline(xintercept= newdat[which.max(newdat$olsfit),1], lty=2)+
  geom_vline(xintercept= newdat[which.max(newdat$predval_glm),1], lty=2)+
  theme(legend.position = 'none')
# ggsave('predhon.png')



##########################
### Appendix results #####
##########################

######## Appendix B ##########
##### Table B1 ####

## Crosstab results for Leeds et al. and BF come from their respective papers
## Below are the replication codes for rows 3-4
## reasons each indicate justification for exclusion (more in main text)
## reason 1 refers to alliances formed during war
## reason 2 refers to within-alliance violations 
## reason 3 refers to stricter interpretation of casus foederis
## reason 4 is Other (usually related to differences in how the attacker and defender is defined in COW)

pre45ep= episode %>% filter(post45==0)
post45ep= episode %>% filter(post45==1)

pre45ep %>% filter (reason!=1 ) %>% pull(formerviol) %>% table
pre45ep %>% filter (reason!=2  ) %>% pull(formerviol) %>% table
pre45ep %>% filter (reason!=3 ) %>% pull(formerviol) %>% table
pre45ep %>% filter (reason!=1 & reason !=2 & reason!=3 & reason !=4) %>%
  pull(formerviol) %>% table

post45ep %>% filter (reason!=1 ) %>% pull(formerviol) %>% table
post45ep %>% filter (reason!=2  ) %>% pull(formerviol) %>% table
post45ep %>% filter (reason!=3 ) %>% pull(formerviol) %>% table
post45ep %>% filter (reason!=1 & reason !=2 & reason!=3 & reason !=4) %>%
  pull(formerviol) %>% table
 
##### Table B2 ####
## Again, data for Leeds et al and BF are taken from their articles/ data
## Below is the replication code for Author's results (3rd row of Table)

episode %>% filter(post45==0) %>% group_by(atopid) %>% 
  slice(1) %>% pull(challreal) %>% table
episode %>% filter(post45==1) %>% group_by(atopid) %>% 
  slice(1) %>% pull(challreal) %>% table



### Appendix D ########
##### Table D1 (logit) ########

a= glm(challreal~ maxallycinc, data=reald_exp, family = 'binomial')
b=glm(challreal~ maxallycinc+nally+
       pdifyr+politymeanyear+milinst+pchange_thirty,data=reald_exp, family = 'binomial')
c=glm(honored_real~ nondef_maxcinc ,data=reald_exp, family = 'binomial')
d=glm(honored_real~ nondef_maxcinc+nally+pdifyr+ 
       politymeanyear+milinst+pchange_thirty,data=reald_exp, family = 'binomial')

 
a1= coeftest(a, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
b1= coeftest(b, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
c1= coeftest(c, 
             vcov. = function(x) vcovHC(x , type = "HC1"))
d1= coeftest(d, 
             vcov. = function(x) vcovHC(x, type = "HC1")) 

stargazer(a1,b1, c1 ,d1,
          dep.var.labels.include = T,
          dep.var.labels = c('Challenges', 'Honor|Challenge'),
          covariate.labels = c(maxallycinc='Strongest Ally',
                               nally= 'Number of Allies', 
                               pdifyr= 'Polity Difference',
                               politymeanyear='Average Alliance Polity ',
                               milinst= 'Military Institutionalization',
                               pchange_thirty= 'Capability Change'  
          ),
          no.space = T)
## again nondef maxcinc is noted as Ally Capability (strongest ally) in the table
stargazer(a,b,c,d,  no.space = T)



##### Table D2 (heckman) ######
library(sampleSelection)
h= selection(challreal ~ maxallycinc +nally+pdifyr+  milinst+pchange_thirty, 
             honored_real~maxallycinc+capratio2  , data = reald_exp) 

stargazer(h, selection.equation = T,
          dep.var.labels = 'Challenges',         no.space = T)
stargazer(h, selection.equation = F,
          dep.var.labels = 'Honor|Challenge',        no.space = T)

##### Table D3 (using alternate ally capability measures Table 1) #######

a= lm(challreal~ sumcinc_alliance, data=reald_exp)
b=lm(challreal~ sumcinc_alliance+nally+
       pdifyr+politymeanyear+milinst+pchange_thirty,data=reald_exp)

c=lm(challreal~ av_allycinc, data=reald_exp)
d=lm(challreal~ av_allycinc+nally+pdifyr+politymeanyear+
       milinst+pchange_thirty,data=reald_exp)
e=lm(honored_real~ sumcinc_alliance, data=reald_exp)
f=lm(honored_real~ sumcinc_alliance+nally+pdifyr+politymeanyear+
       milinst+pchange_thirty,data=reald_exp)
g=lm(honored_real~ av_allycinc, data=reald_exp)
h=lm(honored_real~ av_allycinc+nally+pdifyr+politymeanyear+
       milinst+pchange_thirty,data=reald_exp)


a1= coeftest(a, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
b1= coeftest(b, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
c1= coeftest(c, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
d1= coeftest(d, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))

e1= coeftest(e, 
             vcov. = function(x) vcovHC(x , cluster= "atopid", type = "HC1"))

f1= coeftest(f, 
             vcov. = function(x) vcovHC(x ,cluster= "atopid", type = "HC1"))
g1= coeftest(g, 
             vcov. = function(x) vcovHC(x , cluster= "atopid",type = "HC1"))
h1= coeftest(h, 
             vcov. = function(x) vcovHC(x , cluster= "atopid",type = "HC1"))


stargazer(a1, b1, c1,d1, e1, f1,  g1, h1,
          dep.var.labels.include = T,  
          no.space = T)

stargazer(a,b, c,d,e,f, g,h,   no.space = T)

##### Table D4 (using alternate ally capability measures Table 2-1) #######
 

a= lm(hondummy_real~av_allycinc+ I(av_allycinc^2), reald_exp)
b= lm(hondummy_real~av_allycinc+ I(av_allycinc^2)+nally+pdifyr+ politymean+milinst+pchange_thirty, reald_exp)
c= glm(hondummy_real~av_allycinc+ I(av_allycinc^2), reald_exp,  family = binomial(link = 'logit'))
d=glm(hondummy_real~av_allycinc+ I(av_allycinc^2)+nally+ pdifyr+ politymean +milinst+pchange_thirty, reald_exp,
      family = binomial(link = 'logit'))


a1= coeftest(a,
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
b1= coeftest(b,
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
c1= coeftest(c,
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
d1= coeftest(d,
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))

stargazer(a,b,c,d,
          dep.var.labels = 'Honor',
          covariate.labels = c(
            maxally_mean= 'Ally Capability',
            maxally_mean2= 'Ally Capbility^2',
            nally= 'Number of Allies',
              pdifyr= 'Polity Difference',
            politymean='Average Alliance Polity',
            milinst='Military Institutionalization',
            pchange_thirty= 'Capability Change'),
          no.space = T, single.row = T)
  

##### Table D5 (using alternate ally capability measures Table 2-2) #######


a= lm(hondummy_real~sumcinc_alliance+ I(sumcinc_alliance^2), reald_exp)
b= lm(hondummy_real~sumcinc_alliance+ I(sumcinc_alliance^2)+nally+pdifyr+ politymean+milinst+pchange_thirty, reald_exp)
c= glm(hondummy_real~sumcinc_alliance+ I(sumcinc_alliance^2), reald_exp,  family = binomial(link = 'logit'))
d=glm(hondummy_real~sumcinc_alliance+ I(sumcinc_alliance^2)+nally+pdifyr+ politymean +milinst+pchange_thirty, reald_exp,
      family = binomial(link = 'logit'))

 stargazer(a,b,c,d,
          dep.var.labels = 'Honor',
          covariate.labels = c(
            maxally_mean= 'Ally Capability',
            maxally_mean2= 'Ally Capbility^2',
            nally= 'Number of Allies', 
            pdifyr= 'Polity Difference',
            politymean='Average Alliance Polity',
            milinst='Military Institutionalization',
            pchange_thirty= 'Capability Change'),
          no.space = T, single.row = T)


##### Table D6 (Table 2 with ATOP as UoA) ##########
 
a= lm(hondummy_real~maxally_mean+ I(maxally_mean^2), reald)
b= lm(hondummy_real~maxally_mean+ I(maxally_mean^2)+nally+ politymean+milinst+pchange_thirty, reald)
c= glm(hondummy_real~maxally_mean+ I(maxally_mean^2), reald,  family = binomial(link = 'logit'))
d=glm(hondummy_real~maxally_mean+ I(maxally_mean^2)+nally+ +politymean +milinst+pchange_thirty+politymean, reald,
      family = binomial(link = 'logit'))


a1= coeftest(a, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
b1= coeftest(b, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
c1= coeftest(c, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
d1= coeftest(d, 
             vcov. = function(x) vcovHC(x, cluster= "atopid", type = "HC1"))
stargazer(a,b,c,d,
          dep.var.labels = 'Honor',
          covariate.labels = c(
            maxally_mean= 'Ally Capability',
            maxally_mean2= 'Ally Capbility^2',
            nally= 'Number of Allies',
            pdifyr= 'Polity Difference',
            politymean='Average Alliance Polity',
            milinst='Military Institutionalization',
            pchange_ten= 'Capability Change'),
          no.space = T, single.row = T)


### Appendix E ########


#### Table E1 ########
nmc$post45= ifelse(nmc$year>1944, 1, 0)
nmc_pre45= nmc %>% filter(post45==0)
nmc_post45= nmc %>% filter(post45==1)

war_pre45= war %>% filter(post45==0)
war_post45= war %>% filter(post45==1)
warcase_pre45= warcase %>% filter(post45==0)
warcase_post45= warcase %>% filter(post45==1)

pre45= reald_exp %>% filter(post45==0)
post45= reald_exp %>% filter(post45==1)
## table min med max var 
dtable= cbind(pre45$multilat,
              pre45$nally, 
              pre45$nmidmeanyear, 
              pre45$pdifyr, 
              pre45$milinst,
              pre45$pchange_thirty,
              pre45$politymean,
              pre45$maxallycinc
              
)
a=dtable %>% apply(.,2,min, na.rm=T)
b=dtable %>% apply(.,2,median, na.rm=T)
c=dtable %>% apply(.,2,max, na.rm=T)
pre= cbind(a,b,c)


## table min med max var 
dtable= cbind(post45$multilat,
              post45$nally, 
              post45$nmidmeanyear, 
              post45$pdifyr, 
              post45$milinst,
              post45$pchange_thirty,
              post45$politymean,
              post45$maxallycinc
)
a=dtable %>% apply(.,2,min, na.rm=T)
b=dtable %>% apply(.,2,median, na.rm=T)
c=dtable %>% apply(.,2,max, na.rm=T)
post= cbind(a,b,c)
dtab= cbind(pre,post)
dtab %>% xtable


# others just put in manually
## order presented may be different from table in the text
nmc_pre45$nalliance2 %>% min 
nmc_pre45$nalliance2 %>% max
nmc_pre45$nalliance2 %>% median
nmc_post45$nalliance2 %>% min 
nmc_post45$nalliance2 %>% max
nmc_post45$nalliance2 %>% median



warcase_post45$multi %>% median
war_pre45$defatt %>% median
war_pre45$defatt %>% max
war_post45$defatt %>% median
war_post45$defatt %>% max
war_post45$defatt %>% min

#### Figure E1  #####
aa = atopsy %>% group_by(year) %>% summarize(mean= mean(nalliance))
aa = aa %>% filter(year>1815)
aa %>% class

aa$post45 <- ifelse(aa$year >= 1945, 1, 0)

lm(mean ~ post45, data= aa) %>% stargazer

ggplot(data =  aa)+
  geom_line(aes(year, mean))+
  geom_vline(xintercept = 1890,   lwd=1, lty=2)+
  geom_vline(xintercept = 1939,   lwd=1, lty=2 )+
  geom_vline(xintercept = 1914,  lwd=1, lty=2)+
  # ggtitle('Average Number of Alliance per Member during 1815-2003')+
  xlab('Year')+
  ylab('Average Number of Alliance per Member')+
  annotate("text", x= 1909, y= 6, label= '1914')+
  annotate("text", x= 1945, y= 6, label= '1939')+
  annotate("text", x= 1885, y= 6, label= '1890')
# ggsave('nalliance.png')

#### Figure E2####

ggplot(data = war, aes(x=StartYear1, y= defatt))+
  geom_point()+
  geom_text(aes(label= WarName))+
  geom_smooth(method = 'loess')+
  geom_vline(xintercept = 1945, col='black', lwd=1, lty=2)+
  ylab('Capability Ratio of Defender to Attacker')+
  xlab('War year')+
  xlim(c(1825, 2002))+
  ggtitle('Capability Ratio of Defender to Attacker in all wars')+
  annotate("text", x= 1939, y=1.05, label= '1945')



#### Figure E3 #### 

a= reald_exp$maxallycinc %>% quantile (na.rm=T) %>% nth(4)
b= reald_exp$maxallycinc  %>% mean(na.rm=T)
c= reald_exp$maxallycinc  %>% quantile (na.rm=T) %>% nth(2)

ggplot(data = reald_exp , aes(x= maxallycinc,  
                              group=as.factor(post45), fill= as.factor(post45)))+
  geom_density(alpha=0.6, aes (linetype= as.factor(post45)))+
  scale_linetype_manual(name="Period",values=c(2,1),
                        labels=c('Pre45', 'Post45'))+
  scale_fill_manual(values= c('grey', 'skyblue'),
                    labels=c('Pre45', 'Post45'),
                    name= 'Period')+
  xlab('Strongest ally capability per alliance')+  
  geom_vline(xintercept= c(a,b,c), linetype=3)+  
  annotate("text", x= 0.08, y= 19.5, label= 'Average')+
  annotate("text", x= 0.15, y= 19.5, label= '3rd quartile')+
  annotate("text", x= 0.02, y= 19.5, label= '1st quartile')+
  theme(legend.position='bottom')
# ggsave(       filename= 'polar1_allyear.png')


#### Figure E4 #########
chalreal<- reald_exp %>% filter(challreal==1)
reald_exp = reald_exp %>% mutate(
  everhonored = case_when(honored_real ==1 ~ 2,
                          honored_real ==0 ~1 ,
                          T ~0)
)
reald_exp = reald_exp %>% group_by(atopid) %>% mutate(
  everhonored = max(everhonored)
)
ggplot(data = reald_exp, aes(x=year, y= maxallycinc))+
  geom_line(aes(linetype= as.factor(everhonored), color= as.factor(everhonored),
                group=as.factor(atopid)),
            position=position_jitter(width = 0.01, height = 0.03, seed = 111))+
  geom_vline(xintercept = 1945,  lwd=1, lty=2)+
  ylab('Ally capability (CINC)')+
  xlab('Year')+
  annotate("text", x= 1952, y= 0.35, label= '1945')+
  annotate("text", x= 1950, y= 0.21 , label= 'A')+
  scale_color_manual(name= 'Observed Outcome',
                     labels= c('2'= 'Honored', '1'= 'Violated','0'= 'Unchallenged'),
                     values= c(alpha('black',0.1), alpha('black',0.8), alpha('black',0.9)))+ 
  scale_linetype_manual("Observed Outcome",
                        labels= c('2'= 'Honored', '1'= 'Violated','0'= 'Unchallenged'),
                        values=c('solid' ,'solid','dashed'))+ 
  theme(legend.position = 'bottom',
        # = c(0.31, 0.93), 
        legend.background = element_rect(color = "black", 
                                         fill = "grey90"),
        legend.direction = "horizontal")
# ggsave(
  # filename= 'polar3line.png')


############# END OF FILE #################
  